*** The Bipolar Voter: On the Effect of Political Polarization on Voter Turnout
*** Replication Do-file
*** Data Operations
*** 12.22.2016

cd "/Users/mmoral/Dropbox/SUNY Binghamton PhD/Miscellaneous/The Bipolar Voter/" 

* v1 refers to the original dataset downloaded from the CSES website.
use "/Users/mmoral/Google Drive/SUNY Binghamton PhD/Michael McDonald/-ONGOING/Polarization/cses2 by MM v1.dta", clear /* VER2007-JUN-27 */

** Data Operations
rename (B1004 B1015 B3038* B3045 B4004* B5001* B5002*) (C1004 C1015 C3011* C3013 C4004* C5001* C5002*)
keep C*
gen module=2
saveold "cses2 by MM v2.dta", replace

use "/Users/mmoral/Google Drive/SUNY Binghamton PhD/Michael McDonald/-ONGOING/Polarization/cses3 by MM v1.dta", clear
keep C1004 C1015 C3011* C3013 C4004* C5001* C5002*
gen module=3
append using "cses2 by MM v2.dta"
saveold "cses_merged by MM v1.dta", replace

split C1004, p("_")
egen double country = group(C10041), label lname(coun)
egen pan = concat(C10041 C10042)
egen double panel = group(pan)

* Dropping surveys in non-European countries and/or European countries with no post-election survey in the CSES Module 3
drop if pan=="ALB2005" | pan=="AUS2004" | pan=="AUS2007" | pan=="BEL2003" | pan=="BGR2001" | pan=="BLR2008" | pan=="BRA2002" | /// 
pan=="BRA2006" | pan=="BRA2010" | pan=="CAN2004" | pan=="CAN2008" | pan=="CHL2005" | pan=="CHL2009" | pan=="DEU12002" | pan=="DEU22002" | ///
pan=="GBR2005" | pan=="HGK2008" | pan=="HKG2004" | pan=="HUN2002" | pan=="ISR2006" | pan=="ITA2006" | pan=="JPN2004" | pan=="JPN2007" | ///
pan=="KGZ2005" | pan=="KOR2004" | pan=="KOR2008" | pan=="MEX2003" | pan=="MEX2006" | pan=="MEX2009" | pan=="NZL2002" | pan=="NZL2008" | ///
pan=="PER2006" | pan=="PER2011" | pan=="PHL2004" | pan=="PHL2010" | pan=="RUS2004" | pan=="THA2007" | pan=="TUR2011" | pan=="TWN2001" | ///
pan=="TWN2004" | pan=="TWN2008" | pan=="URY2009" | pan=="USA2004" | pan=="USA2008" | pan=="ZAF2009"

* Dropping the oldest post-election survey for countries with more than two post-election surveys in the CSES Module 3
drop if pan=="CZE2002" | pan=="FIN2003" | pan=="ISL2003" | pan=="NLD2002" | pan=="NOR2001" | pan=="POL2001" | pan=="PRT2002"

* Romania 2004 - presidential elections, the vote shares of parties cannot be determined
drop if pan=="ROU2004"

* No political information
drop if pan=="SVN2008"

* Presidential elections & two-rounds
drop if pan=="FRA2002" | pan=="FRA2007"

* Recoding missing data
recode C3013 C3011* (11/99=.a)
recode C5001_* C5002_* C4004_* (101/999=.a)

* Renaming variables for data operationalization
rename C3011_*? C3011_#, renumber
rename C5001_*? C5001_#, renumber
rename C5002_*? C5002_#, renumber
*saveold "cses_merged by MM v2.dta", replace

** Polarization measures
forval i=1/9 {
by pan, sort : egen double mps`i' = mean(C3011_`i')
label var mps`i' "Perceived Left-Right Stance of Party`i'"
}

gen C3013sd=C3013
collapse (sd) C3013sd (mean) mps* C3013 C5001_* C5002_* module panel, by(pan)
reshape long mps C5001_ C5002_, i(pan) j(party)

* Dropping parties with missing vote shares
drop if missing(mps)
drop if missing(C5001_) & missing(C5002_)

* Parties that do not exist in previous post-election surveys 
drop if pan=="CHE2007" & party==6 | pan=="CHE2007" & party==7 | pan=="CHE2007" & party==8
drop if pan=="CZE2010" & party==3 | pan=="CZE2010" & party==5
drop if pan=="DNK2001" & party==6
drop if pan=="DNK2007" & party==7 | pan=="DNK2007" & party==9
drop if pan=="ISL2007" & party==6 
drop if pan=="ISL2009" & party==5
drop if pan=="POL2005" & party==3 | pan=="POL2005" & party==5 | pan=="POL2005" & party==7 | pan=="POL2005" & party==8
drop if pan=="SWE2006" & party==7 | pan=="SWE2006" & party==8

* Actual party polarization
by pan, sort : egen double meanps = mean(mps)
by pan, sort : egen double meanl = mean(mps) if mps<meanps
by pan, sort : egen double meanr = mean(mps) if mps>meanps
xfill meanl, i(panel)
xfill meanr, i(panel)
gen polv1=meanr-meanl

* Distance between the two most extreme parties (Crepaz 1990)
by pan, sort : egen double rmostp = max(mps)
by pan, sort : egen double lmostp = min(mps)
gen crepol=rmostp-lmostp

* Sum of weighted party distances from the mean voter -i.e., weighted polarization (Lachat 2008*, Pardos-Prado and Dinas 2010, Dow 2011) 
replace C5001_=C5001_/100
gen diff1=abs(mps-C3013)
gen wediff1=diff1*C5001_
by pan, sort : egen double wepdm = total(wediff1)

* Weighted party-system extremism (Dow 2011*, Ezrow 2011)
gen wpsc=wepdm/C3013sd

* Polarization (Esteban & Ray 1994)
replace C5001_=C5001_*100
sort pan party
forval i=1/9 {
gen party`i'loc=.
gen party`i'size=.
}

levelsof pan, local(levels)
foreach l of local levels {
forval i=1/9 {
replace party`i'loc=mps if party==`i' & pan=="`l'"
replace party`i'size=C5001_/100 if party==`i' & pan=="`l'"
}
}
xfill party*loc party*size, i(panel)

foreach comb in 12 13 14 15 16 17 18 19 21 23 24 25 26 27 28 29 31 32 34 35 36 37 38 39 41 42 43 45 46 47 48 49 51 52 53 54 56 57 58 59 61 62 63 64 65 67 68 69 71 72 73 74 75 76 78 79 81 82 83 84 85 86 87 89 91 92 93 94 95 96 97 98{
gen er`comb'prod=.
forval x=1/9 {
forval y=1/9 {
replace er`comb'prod=((C5001_/100)^2)*party`y'size*abs(mps-party`y'loc) if `comb'==(10*`x'+`y') & party==`x' & `x'!=`y'
}
}
}
xfill er*prod, i(panel)
egen double estray = rowtotal(er*prod)

* Actual party polarization in t-1
collapse (mean) polv1 wpsc wepdm estray crepol, by(pan)
split pan, p("2")
gen polv1pre=polv1[_n-1] if pan1==pan1[_n-1]
gen polv1delta=polv1-polv1pre

* Dropping the post-election surveys from Module 2
drop pan2
drop if pan=="CHE2003" | pan=="DNK2001" | pan=="ESP2004" | pan=="IRL2002" | pan=="ISR2003" | pan=="POL2005" | pan=="PRT2005" | pan=="SVN2004" | pan=="SWE2002"

* Variable labels
lab var pan "Post-election survey (string)" 
lab var polv1 "Party Polarization"
lab var polv1pre "Party Polarization in t_0"
lab var polv1delta "$\Delta$ Party Polarization"
lab var wpsc "Weighted Party System Extremism (Dow)"
lab var wepdm "Weighted Polarization (Lachat)"
lab var estray "Polarization (Esteban & Ray)"
lab var crepol "Party Polarization (Crepaz)"
saveold "cses_merged by MM v3.dta", replace

** Data Operations
use "/Users/mmoral/Google Drive/SUNY Binghamton PhD/Michael McDonald/-ONGOING/Polarization/cses3 by MM v1.dta", clear
split C1004, p("_")
egen pan = concat(C10041 C10042)
merge m:m pan using "cses_merged by MM v3.dta"
drop if _merge==1
egen double panel = group(pan)

recode C3011* (11/999=.a)
recode C4004* (101/999=.a)
rename C3011_*? C3011_#, renumber
rename C4004_*? C4004_#, renumber

* Parties that are not taken into consideration in computing party polarization variables
recode C3011_6 C3011_7 C3011_8 (else=.a) if pan=="CHE2007"
recode C3011_3 C3011_5 (else=.a) if pan=="CZE2010"
recode C3011_7 (else=.a) if pan=="DEU2005"
recode C3011_7 C3011_8 C3011_9 (else=.a) if pan=="DEU2009"
recode C3011_7 C3011_9 (else=.a) if pan=="DNK2007"
recode C3011_7 C3011_8 (else=.a) if pan=="ESP2008"
recode C3011_6 (else=.a) if pan=="ISL2007"
recode C3011_5 (else=.a) if pan=="ISL2009"
recode C3011_3 C3011_5 C3011_7 C3011_8 (else=.a) if pan=="POL2005"
recode C3011_7 C3011_8 C3011_9 (else=.a) if pan=="SWE2006"

* Perceived polarization by individual respondents
forval i=1/9 {
by pan, sort : egen double meanplr`i' = mean(C3011_`i')
label var meanplr`i' "Mean Perceived Left-Right Stand of Party`i'"
}
egen double meanps = rowmean(meanplr*)
forval i=1/9 {
gen l`i'=1 if meanplr`i'<meanps & meanplr`i'!=.
gen r`i'=1 if meanplr`i'>meanps & meanplr`i'!=.
gen lperc`i'=l`i'*C3011_`i'
gen rperc`i'=r`i'*C3011_`i'
}
egen double percl= rowmean(lperc*)
egen double percr= rowmean(rperc*)
gen polv1perc=percr-percl

* Perceived polarization by respondents (based on Crepaz 1990)
egen double surv = group(pan)
egen double rmostp = rowmax(meanplr*)
egen double lmostp = rowmin(meanplr*)
gen rm=.
gen lm=.
qui sum surv
forval i=1/`r(max)' {
forval x=1/9 {
replace rm=`x' if meanplr`x'==rmostp & surv==`i'
replace lm=`x' if meanplr`x'==lmostp & surv==`i'
}
}
gen rmostr=.
gen lmostr=.
qui sum surv 
forval i=1/`r(max)' {
qui sum rm if surv==`i'
replace rmostr=C3011_`r(mean)' if surv==`i'
qui sum lm if surv==`i'
replace lmostr=C3011_`r(mean)' if surv==`i'
}
gen crepolperc=rmostr-lmostr

* Political knowledge about parties
egen double ignorant0 = rownonmiss(C3011_1-C3011_4)
gen ignorant1=1 if polv1perc<0
gen ignorant2=1 if crepolperc<0
recode ignorant* (missing=0)

* Perceived polarization (difference) 
gen polv1dif=polv1perc-polv1
by panel, sort : egen double sddif = sd(polv1dif) if ignorant0>=3 & ignorant1==0
by panel, sort : egen double mdif = mean(polv1dif) if ignorant0>=3 & ignorant1==0
gen polv1difv2=(polv1dif-mdif)/sddif

by panel, sort : egen double sddif2 = sd(polv1dif)
by panel, sort : egen double mdif2 = mean(polv1dif)
gen polv1difv3=(polv1dif-mdif2)/sddif2

gen crepoldif=crepolperc-crepol
by panel, sort : egen double sddif3 = sd(crepoldif) if ignorant0>=3 & ignorant2==0
by panel, sort : egen double mdif3 = mean(crepoldif) if ignorant0>=3 & ignorant2==0
gen crepoldifv2=(crepoldif-mdif3)/sddif3

* DV: Voter turnout
recode C3021_1 (5=0) (7/9=.a), gen(vote)

* Validated turnout records
foreach i in 0000000376 0000001182 0000001405 0000001883 0000002238 0000002859{
recode vote (0=1) if C1009=="`i'" & pan=="NOR2005"
}

foreach i in 0000000108 0000000251 0000000303 0000000306 0000000429 0000000436 0000000441 0000000474 ///
0000000500 0000000604 0000001004 0000001311 0000001403 0000001411 0000001414 0000001775{
recode vote (0=1) if C1009=="`i'" & pan=="NOR2009"
}
foreach i in 0000000015 0000000051 0000000055 0000000072 0000000075 0000000079 0000000140 0000000169 ///
0000000184 0000000213 0000000247 0000000340 0000000415 0000000433 0000000463 0000000473 0000000554 ///
0000000555 0000000556 0000000559 0000000560 0000000562 0000000624 0000000680 0000000686 0000000695 ///
0000000725 0000000726 0000000743 0000000745 0000000754 0000000761 0000000784 0000000801 0000000804 ///
0000000814 0000000820 0000000826 0000000827 0000000838 0000000842 0000000854 0000000867 0000000880 ///
0000000890 0000000893 0000000900 0000000913 0000000921 0000000950 0000000977 0000000986 0000000998 ///
0000001051 0000001053 0000001058 0000001078 0000001090 0000001108 0000001123 0000001129 0000001146 ///
0000001173 0000001181 0000001216 0000001248 0000001335 0000001353 0000001379 0000001380 0000001436 ///
0000001451 0000001487 0000001501 0000001513 0000001524 0000001530 0000001634 0000001637 0000001656 ///
0000001657 0000001659 0000001665 0000001679 0000001684 0000001687 0000001723 0000001736 0000001769{
recode vote (1=0) if C1009=="`i'" & pan=="NOR2009"
}

foreach i in 0000201650, 0000221418{
recode vote (0=1) if C1009=="`i'" & pan=="SWE2006"
}
foreach i in 0000200019 0000200086 0000200439 0000200573 0000200736 0000200886 0000200915 0000200928 ///
0000201294 0000201345 0000201348 0000201387 0000201501 0000201578 0000201729 0000201801 0000201965 ///
0000202187 0000202399 0000202447 0000221082 0000221086 0000221178 0000221181 0000221265 0000221412 ///
0000221445 0000221501 0000221535 0000221572 0000221587 0000221662 0000221710 0000221746 0000221758 0000221822{
recode vote (1=0) if C1009=="`i'" & pan=="SWE2006"
}

* Political information
recode C3036_1 C3036_2 C3036_3 (5/8=-1) (9=.a)
egen polinf= rowtotal(C3036_1 C3036_2 C3036_3), missing
by pan, sort : egen double mpolinf = mean(polinf)
by pan, sort : egen double sdpolinf = sd(polinf)
gen polinf2=(polinf-mpolinf)/sdpolinf if polinf!=.


* Effective number of electoral parties at the district-level
foreach var of varlist C4004_*{
replace `var'=`var'/100
}
egen double dsumvote = rowtotal(C4004_*), missing
foreach var of varlist C4004_* {
replace `var'=`var'*(1/dsumvote)
replace `var'=`var'^2
}
egen double dsumvotesq = rowtotal(C4004_*), missing
gen enepdis=1/dsumvotesq

* Strong partisan attachment
gen strpartisan=0
foreach var of varlist C3009_A-C3009_I {
replace strpartisan = strpartisan + (`var'>=9 & `var'<=10)
}
replace strpartisan=0 if strpartisan!=1

* Income
recode C2020 (6/9=.a), gen(temp1)
wridit temp1, gen(inc) by(pan)

* Age
recode C2001 (997/999=.a), gen(age)

* Gender
recode C2002 (9=.a) (1=0) (2=1), gen(female)

* Education
recode C2003 (1/2=0) (3/4=1) (5=2) (6/7=3) (8=4) (9=5) (97/99=.a), gen(temp2)
wridit temp2, gen(educ) by(pan)

* Union membership
recode C2005 (2=0) (7/9=.a), gen(union)

* Ideological extremity
recode C3013 (95/99=.a), gen(idst)
by C1003, sort : egen double meanidst = mean(idst)
by C1003, sort : egen double sdidst = sd(idst)
replace idst=abs(idst-meanidst)/sdidst
recode idst enepdis inc educ (missing=.a) 

* Dropping post-election surveys with missing values
tabstat enepdis inc educ age female union idst strpartisan, by(pan)
drop if pan=="ESP2008" /* (union is missing) */ | pan=="LVA2010" /* (inc is missing) */

* Variable labels 
lab var vote "Turnout"
lab var pan "Election" 
lab var pan1 "Country" 
lab var enepd "Eff. Number of Electoral Parties (District)"
lab var polinf "Political Information (unstandardized)"
lab var polinf2 "Political Information"
lab var inc "Income"
lab var age "Age"
lab var female "Gender"
lab var educ "Education"
lab var union "Union Membership"
lab var idst "Ideological Extremity" 
lab var strpartisan "Strong Partisan Attachment"
lab var polv1perc "Perceived Party Polarization"
lab var polv1dif "Perceived Polarization (Dif. Unstandardized)"
lab var polv1difv2 "Perceived Par. Polarization (Dif.)"
lab var polv1difv3 "Perceived Par. Polarization (Dif.)-All Rs"
lab var crepolperc "Perceived Par. Polarization (Crepaz)"
lab var crepoldif "Perceived Par. Polarization (Dif. Unstandardized/Crepaz)" 
lab var crepoldifv2 "Perceived Par. Polarization (Dif./Crepaz)" 
lab var ignorant0 "Information about the Largest Four Parties"
lab var ignorant1 "Knowledge about the Left-Right Policy Space"
lab var ignorant2 "Knowledge about the Most Extreme Parties"

* Creating the replication dataset
keep vote polinf* inc age female educ union idst strpartisan polv* crep* wpsc* wepdm* estray* ignorant* pan pan1 en*
qui logit vote c.polv1difv2##c.polinf2 polv1 enepdis inc educ age female union idst strpartisan
tabstat polv1 polv1delta vote if e(sample), by(pan)
tab pan if e(sample), gen(p_)
drop p_10 /* Finland 2007 is the omitted post-election survey with closest turnout and polarization scores to their respective means */
saveold "Replication Data.dta", replace
